clear all 
set more off 
set maxvar 15000 
clear matrix


	use "$Mydirectory1/3_Output/2_PooledData_analysis.dta", clear 
    keep if baseline_sample==1
    
    rename wgt_sex_race weight
    
    * Variables for saving estimates
    forval i=1(1)2 {
        gen est_`i'=. 
        gen est_lb_`i' =.
        gen est_ub_`i' =.
        
        gen cov_`i'=.
        gen var_x_`i'=.
        gen var_y_`i'=.
    }
    
    tempfile surveys 
    save `surveys'
    
*-----------------------------------------
* MULTIPLE IMPUTATIONS
*-----------------------------------------

*---------------------------------------*
/* BRING IN EARLY COHORT MULTIPLE
   IMPUTATIONS RESULTS --- 1910-1930 */
*---------------------------------------*
    
	use "$Mydirectory2/appendix_b/imputed_values_results_1940_1936fix.dta", clear

    keep if decade<1940

    tempfile early
    save `early'


     foreach x1 in 4 5 6 7  {

			if "`x1'"=="4" {
                local x2 "5" //use 1950 census 
            }
            
            if "`x1'"=="5" local x2 "6"
            if "`x1'"=="6" local x2 "7"
            if "`x1'"=="7" local x2 "8"
            
            noisily display "cohort 19`x1'0"

        * Bring in corresponding Census and clean up some variables 
            use "$CensusData/output/Census19`x2'0_fathers_ages30to50.dta", clear
            
        * Rename a couple variables
            if "`x1'"=="4"  {
                clonevar log_father_hh_income = log_father_inctot
                rename slwt weight
            }
            else { 
                rename perwt weight
            }
            
        * Re-center weight to have mean 1
            quietly sum weight, d
            replace weight = weight/`r(mean)'
            quietly sum weight, d
            assert `r(mean)'==1
            
        * Append surveys 
            append using `surveys' 
            replace census=0 if census==.
            
        * Keep relevant sample 
            keep if census==1 | (census==0 & decade==19`x1'0) 
            tab census
            
        * keep triplets in both survey and census 
            drop if fatheroccej==99
            egen triplet= group(fatheroccej race south_merge)
            
            bysort triplet census: gen tag = _n==1
            bysort triplet: egen number_surveys = sum(tag)
            keep if number_surveys==2
            tab census 
            
        * Make dummy variables for existing triplets
            drop triplet
            egen triplet= group(fatheroccej race south_merge)
            quietly tab triplet, gen(trip_)
            
        tempfile restricted 
        save `restricted'
            
        **-----------------------

        local father_inc "log_father_closest_census_v2"

        * Baseline regression     
            //summary stats 
            sum `father_inc' if census==0 [aw=weight], d
            local var_y = `r(Var)'
            
            sum log_son_baseline if census==0 [aw=weight], d
            local var_x = `r(Var)'
            
            corr log_son_baseline `father_inc' if census==0 [aw=weight], c
            local cov_baseline = `r(cov_12)'
            
            //Baseline regression using the imputation from nearest Census
            reg log_son_baseline `father_inc' if census==0 [aw=weight], robust 
                
            replace est_1 = _b[`father_inc'] 
            replace est_ub_1 = _b[`father_inc']+1.96*_se[`father_inc'] 
            replace est_lb_1 = _b[`father_inc']-1.96*_se[`father_inc'] 
            
            replace cov_1 = `cov_baseline' 
            replace var_x_1 = `var_x' 
            replace var_y_1 = `var_y' 
            
        **-----------------------
            
        * Create results dataset
            keep est_* cov_* var_x_* var_y_* 
            keep if _n==1
            
            tempfile results
            save `results'
            
        **-----------------------
            
        * Now do multiple imputations  
            noisily display "Multiple imputation time"

            use `restricted', clear
            local dad_measure "log_father_hh_income" 
            
            sum log_son_baseline if census==0 [aw=weight], d 
            local var_x = `r(Var)'

            mi set wide
            mi register imputed `dad_measure'
            
            local reps "100"
            mi impute regress `dad_measure' trip_* [aw=weight], add(`reps') rseed(819016) 
            
        * Grab the variances from each imputation 
            gen variance=.
            forval j=1(1)`reps' {
                quietly sum _`j'_`dad_measure' if census==0 [aw=weight], d 
                quietly replace variance = `r(Var)' if _n==`j'
            } 
            egen avg_variance = mean(variance) //avg variance across imputations 
            sum avg_variance if census==0 [aw=weight]
            local var_yp = `r(mean)'
            
            mi estimate, post: reg log_son_baseline `dad_measure' if census==0 [aw=weight], robust  
                
            mat m= r(table)
            mat list m
            scalar coeff1=m[1,1]
            scalar lb = m[5,1]
            scalar ub = m[6,1]
            
            scalar cov_imp= coeff1*`var_yp' //Back out covariance
            
        **------------------------------------

        * Save estimates in "results" dataset 
            use `results', clear
            
            replace est_2 = coeff1 
            replace est_ub_2 = ub 
            replace est_lb_2 = lb 

            replace cov_2 = cov_imp 
            replace var_x_2 = `var_x' 
            replace var_y_2 = `var_yp' 
            
            gen decade = 1900 + (`x1'*10)
            
            tempfile results_`x1'
            save `results_`x1''
            
        }

    * Append results together      
        use `results_4', clear
        append using `results_5'
        append using `results_6'
        append using `results_7'
        
        reshape long est_ est_lb_ est_ub_ cov_ var_x_ var_y_, i(decade) j(estimate)
        replace decade= decade+2 if estimate==2
        
        append using `early'
        sort decade estimate
            
        compress 
        save "$Mydirectory2/appendix_b/MI_output_post1940.dta", replace
        
    